vignettes/example-gallery-05-scatter-plots.Rmd
example-gallery-05-scatter-plots.RmdThis document is adapted from the Bar Charts section of the Altair Example Gallery.
Our first step is to set up our environment:
# devtools::install_github("vegawidget/altair")
library("altair")
library("tibble")
library("dplyr")
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library("purrr")
library("tidyr")
vega_data <- import_vega_data()glimpse(vega_data$barley())
#> Observations: 120
#> Variables: 4
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Gra…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Man…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.966…chart <-
alt$Chart(vega_data$barley())$
mark_point()$
encode(
x = alt$X("median(yield)", scale=alt$Scale(zero=FALSE)),
y = alt$Y(
"variety",
sort = alt$EncodingSortField(
field = "yield",
op = "median",
order = "descending"
),
scale = alt$Scale(rangeStep = 20)
),
color = "year:N",
row = "site"
)
chartmovies <- vega_data$movies()
glimpse(movies)
#> Observations: 3,201
#> Variables: 16
#> $ Creative_Type <list> [NULL, NULL, NULL, NULL, "Contemporary F…
#> $ Director <list> [NULL, NULL, NULL, NULL, NULL, NULL, "Ch…
#> $ Distributor <list> ["Gramercy", "Strand", "Lionsgate", "Fin…
#> $ IMDB_Rating <dbl> 6.1, 6.9, 6.8, NaN, 3.4, NaN, 7.7, 3.8, 5…
#> $ IMDB_Votes <dbl> 1071, 207, 865, NaN, 165, NaN, 15133, 353…
#> $ MPAA_Rating <list> ["R", "R", NULL, NULL, "R", NULL, "R", "…
#> $ Major_Genre <list> [NULL, "Drama", "Comedy", "Comedy", "Dra…
#> $ Production_Budget <dbl> 8000000, 300000, 250000, 300000, 1000000,…
#> $ Release_Date <chr> "Jun 12 1998", "Aug 07 1998", "Aug 28 199…
#> $ Rotten_Tomatoes_Rating <dbl> NaN, NaN, NaN, 13, 62, NaN, NaN, NaN, 25,…
#> $ Running_Time_min <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, N…
#> $ Source <list> [NULL, NULL, NULL, NULL, "Original Scree…
#> $ Title <list> ["The Land Girls", "First Love, Last Rit…
#> $ US_DVD_Sales <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, N…
#> $ US_Gross <dbl> 146083, 10876, 203134, 373615, 1009819, 2…
#> $ Worldwide_Gross <dbl> 146083, 10876, 203134, 373615, 1087521, 2…glimpse(vega_data$cars())
#> Observations: 406
#> Variables: 9
#> $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 1…
#> $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,…
#> $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 39…
#> $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 19…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, Na…
#> $ Name <chr> "chevrolet chevelle malibu", "buick skylark 320…
#> $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA"…
#> $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312,…
#> $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01…This example shows how layering can be used to build a plot. This dataset tracks miles driven per capita along with gas prices annually from 1956 to 2010. It is based on the May 2, 2010 New York Times article ‘Driving Shifts Into Reverse’. See this reference.
glimpse(vega_data$driving())
#> Observations: 55
#> Variables: 4
#> $ gas <dbl> 2.38, 2.40, 2.26, 2.31, 2.27, 2.25, 2.22, 2.12, 2.11, 2.14…
#> $ miles <dbl> 3675, 3706, 3766, 3905, 3935, 3977, 4085, 4218, 4369, 4538…
#> $ side <chr> "left", "right", "bottom", "top", "right", "bottom", "righ…
#> $ year <dbl> 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965…lines <-
alt$Chart(vega_data$driving())$
mark_line()$
encode(
x = alt$X("miles", scale = alt$Scale(zero = FALSE)),
y = alt$Y("gas", scale = alt$Scale(zero = FALSE)),
order="year"
)
points <-
alt$Chart(vega_data$driving())$
mark_circle()$
encode(
alt$X("miles", scale = alt$Scale(zero = FALSE)),
alt$Y("gas", scale = alt$Scale(zero = FALSE))
)
chart <- (lines + points)
chartThis example shows how to make a dot-dash plot presented in Edward Tufte’s book Visual Display of Quantitative Information on page 133. This example is based on g3o2’s block.
glimpse(vega_data$cars())
#> Observations: 406
#> Variables: 9
#> $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 1…
#> $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,…
#> $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 39…
#> $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 19…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, Na…
#> $ Name <chr> "chevrolet chevelle malibu", "buick skylark 320…
#> $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA"…
#> $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312,…
#> $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01…cars <- vega_data$cars()
brush <- alt$selection(type = "interval")
tick_axis <- alt$Axis(labels=FALSE, domain=FALSE, ticks=FALSE)
tick_axis_notitle <-
alt$Axis(labels=FALSE, domain=FALSE, ticks=FALSE, title="")
points <-
alt$Chart(cars)$
mark_point()$
encode(
x = alt$X("Miles_per_Gallon", axis = alt$Axis(title = "")),
y = alt$Y("Horsepower", axis = alt$Axis(title = "")),
color = alt$condition(brush, "Origin", alt$value("grey"))
)$
properties(selection = brush)
x_ticks <-
alt$Chart(cars)$
mark_tick()$
encode(
x = alt$X("Miles_per_Gallon", axis = tick_axis),
y = alt$Y("Origin", axis = tick_axis_notitle),
color = alt$condition(brush, "Origin", alt$value("lightgrey"))
)$
properties(selection = brush)
y_ticks <-
alt$Chart(cars)$
mark_tick()$
encode(
alt$X("Origin", axis = tick_axis_notitle),
alt$Y("Horsepower", axis = tick_axis),
color=alt$condition(brush, "Origin", alt$value("lightgrey"))
)$
properties(selection = brush)
chart <- (y_ticks | (points & x_ticks))
chart
#> This vegaspec has class `vegaspec_hconcat`, which implies multiple views. Specifying the width or height of a vegaspec with multiple views has no effect on rendering.This example shows how to make a bubble plot showing the correlation between health and income for 187 countries in the world (modified from an example in Lisa Charlotte Rost’s blog post ‘One Chart, Twelve Charting Libraries’).
glimpse(vega_data$gapminder_health_income())
#> Observations: 187
#> Variables: 4
#> $ country <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Ango…
#> $ income <dbl> 1925, 10620, 13434, 46577, 7615, 21049, 17344, 7763, …
#> $ health <dbl> 57.63, 76.00, 76.50, 84.10, 61.00, 75.20, 76.20, 74.4…
#> $ population <dbl> 32526562, 2896679, 39666519, 70473, 25021974, 91818, …glimpse(vega_data$iris())
#> Observations: 150
#> Variables: 5
#> $ petalLength <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.…
#> $ petalWidth <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.…
#> $ sepalLength <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.…
#> $ sepalWidth <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.…
#> $ species <chr> "setosa", "setosa", "setosa", "setosa", "setosa", "s…chart <-
alt$Chart(vega_data$disasters())$
mark_circle(opacity = 0.8, stroke = "black", strokeWidth = 1)$
encode(
x = alt$X("Year:O", axis = alt$Axis(labelAngle = 0)),
y = alt$Y("Entity:N"),
size = alt$Size(
"Deaths:Q",
scale = alt$Scale(range = c(0, 5000)),
legend = alt$Legend(title = "Annual Global Deaths")
),
color = alt$Color("Entity:N", legend = NULL)
)$
properties(
width = 480,
height = 350
)$
transform_filter("datum.Entity != 'All natural disasters'")
chartDefinition
n <- 40
data <-
data_frame(
x = runif(n)**2,
y = 10 - 1 / (x + 0.1) + runif(n)
)
#> Warning: `data_frame()` is deprecated, use `tibble()`.
#> This warning is displayed once per session.
degree <- c(1, 3, 5)
model_poly <-
map(degree, ~lm(y ~ poly(x, degree = .x), data = data))
x_pred <- seq(min(data$x), max(data$x), length.out = 500)
data_predict <-
crossing(degree, x = x_pred) %>%
nest(x) %>%
mutate(y = map2(model_poly, data, predict)) %>%
unnest()glimpse(vega_data$cars())
#> Observations: 406
#> Variables: 9
#> $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 1…
#> $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,…
#> $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 39…
#> $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 19…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, Na…
#> $ Name <chr> "chevrolet chevelle malibu", "buick skylark 320…
#> $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA"…
#> $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312,…
#> $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01…chart <-
alt$Chart(vega_data$cars())$
mark_circle()$
encode(
x = alt$X(alt$`repeat`("column"), type = "quantitative"),
y = alt$Y(alt$`repeat`("row"), type = "quantitative"),
color = "Origin:N"
)$
properties(width = 200, height = 200)$
`repeat`(
row = list("Horsepower", "Acceleration", "Miles_per_Gallon"),
column = list("Miles_per_Gallon", "Acceleration", "Horsepower")
)$
interactive()
chart
#> This vegaspec has class `vegaspec_repeat`, which implies multiple views. Specifying the width or height of a vegaspec with multiple views has no effect on rendering.Layering can now work with the + operator as it does in Python. Additionally, it can be declared as alt$layer(chart1, chart2) or as alt$ChartLayer(layer = list(chart1, chart2))
This example demonstrates the need for a ggplot2::facet_wrap()-like capability, which we understand is coming to Vega-Lite in the no-so-distant future.
glimpse(vega_data$movies())
#> Observations: 3,201
#> Variables: 16
#> $ Creative_Type <list> [NULL, NULL, NULL, NULL, "Contemporary F…
#> $ Director <list> [NULL, NULL, NULL, NULL, NULL, NULL, "Ch…
#> $ Distributor <list> ["Gramercy", "Strand", "Lionsgate", "Fin…
#> $ IMDB_Rating <dbl> 6.1, 6.9, 6.8, NaN, 3.4, NaN, 7.7, 3.8, 5…
#> $ IMDB_Votes <dbl> 1071, 207, 865, NaN, 165, NaN, 15133, 353…
#> $ MPAA_Rating <list> ["R", "R", NULL, NULL, "R", NULL, "R", "…
#> $ Major_Genre <list> [NULL, "Drama", "Comedy", "Comedy", "Dra…
#> $ Production_Budget <dbl> 8000000, 300000, 250000, 300000, 1000000,…
#> $ Release_Date <chr> "Jun 12 1998", "Aug 07 1998", "Aug 28 199…
#> $ Rotten_Tomatoes_Rating <dbl> NaN, NaN, NaN, 13, 62, NaN, NaN, NaN, 25,…
#> $ Running_Time_min <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, N…
#> $ Source <list> [NULL, NULL, NULL, NULL, "Original Scree…
#> $ Title <list> ["The Land Girls", "First Love, Last Rit…
#> $ US_DVD_Sales <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, N…
#> $ US_Gross <dbl> 146083, 10876, 203134, 373615, 1009819, 2…
#> $ Worldwide_Gross <dbl> 146083, 10876, 203134, 373615, 1087521, 2…